# -*- coding: utf-8 -*-

# FILE: LiteralDataFilter.py
# DESC: Filters through literal data.

from DataFilter import DataFilter
from infra.logging import logger

import re

class LiteralDataFilter(DataFilter):

	def filter(self, doc):
		log = logger.getLogger()
		sdoc = re.sub("[^ ,;:.!?\-'`A-ZÁÉÍĹÓŔÚÝÀÈÒÙÂÊÎÔÛŠŽČŇĚŘŤĎĽÄÖÜÑÃÕÅŮÇa-záéíĺóŕúýàèòùâêîôûšžčňěřťďľäöëüñãõåůç0-9]", " ", doc)
		sdoc = re.sub("\s{2,}", " ", sdoc)
		# removing leading and trailing white space 
		sdoc = re.sub("^\s+", "", sdoc)
		sdoc = re.sub("(.*?)\s+$", "\g<1>", sdoc)
		log.debug("before: \"%s\", after: \"%s\"" % (doc, sdoc))
		return sdoc
